options(width = 120)
library(data.table)
library(hash)
library(plotly)
library(arules)
cards = fread(cmd = '7z e -so "jan_2019_games_arenas_1_to_13.7z"', header=TRUE)
cards$player_deck_list <- strsplit(cards$player_deck, "_")
cards$opp_deck_list <- strsplit(cards$opp_deck, "_")
cards[, winner_deck_list := ifelse(has_won, player_deck_list, opp_deck_list)]
head(cards, 1000)
## timestamp arena_id tag opp_tag has_won
## 1: 2019-01-01 00:00:46 8 8L9GQVVUL 9QRV2V888 1
## 2: 2019-01-01 00:02:00 13 92GLVGQU 9PVGG0JUQ 1
## 3: 2019-01-01 00:02:21 12 8RPVVVRPV JP0VC992 1
## 4: 2019-01-01 00:03:12 12 Y0PQGY0R 2L0RUP82R 0
## 5: 2019-01-01 00:03:42 13 92GLVGQU P2L989LCR 1
## ---
## 996: 2019-01-01 11:21:58 11 LL0C02PJ 9VP0R0Y99 0
## 997: 2019-01-01 11:23:05 11 9Y8QRG0JU 9RP0LV222 1
## 998: 2019-01-01 11:23:24 11 RCGGPRUP 9L92VG0G 1
## 999: 2019-01-01 11:23:24 13 RQ8ULCRV GJG0GV0 0
## 1000: 2019-01-01 11:26:17 11 9U09C0RL 8C0J0200 1
## player_deck
## 1: balloon_barbarian-barrel_elixir-collector_fireball_freeze_inferno-tower_skeleton-army_skeletons
## 2: baby-dragon_balloon_barbarian-barrel_bowler_freeze_inferno-dragon_lumberjack_tornado
## 3: goblin-barrel_hog-rider_inferno-tower_minion-horde_prince_rocket_skeleton-army_wizard
## 4: arrows_balloon_electro-wizard_ice-wizard_inferno-dragon_mega-knight_minions_tombstone
## 5: baby-dragon_balloon_barbarian-barrel_bowler_freeze_inferno-dragon_lumberjack_tornado
## ---
## 996: dark-prince_inferno-dragon_pekka_poison_skeleton-army_tombstone_tornado_zap
## 997: bats_electro-wizard_giant-skeleton_goblin-barrel_lumberjack_princess_sparky_zap
## 998: baby-dragon_fireball_fire-spirits_inferno-dragon_mini-pekka_the-log_tombstone_valkyrie
## 999: elite-barbarians_fire-spirits_goblin-barrel_goblin-gang_minion-horde_princess_wizard_zap
## 1000: bats_battle-ram_elixir-collector_giant_goblin-gang_minion-horde_three-musketeers_zap
## opp_deck
## 1: bomb-tower_executioner_ice-wizard_inferno-tower_mortar_rocket_wizard_x-bow
## 2: baby-dragon_bats_dart-goblin_electro-wizard_flying-machine_mega-knight_rocket_the-log
## 3: bandit_battle-ram_electro-wizard_fireball_minions_pekka_royal-ghost_zap
## 4: clone_executioner_fireball_giant-skeleton_miner_mirror_skeleton-army_tornado
## 5: bats_clone_elixir-collector_executioner_golem_night-witch_pekka_valkyrie
## ---
## 996: clone_ice-wizard_mega-knight_princess_royal-ghost_skeleton-army_the-log_three-musketeers
## 997: arrows_bats_giant-skeleton_goblin-hut_inferno-tower_poison_witch_wizard
## 998: dark-prince_elixir-collector_goblins_golem_ice-golem_lumberjack_mega-knight_zap
## 999: arrows_giant-skeleton_goblin-hut_hog-rider_minion-horde_minions_rocket_wizard
## 1000: arrows_balloon_bandit_barbarian-barrel_lumberjack_mega-minion_pekka_wizard
## player_deck_list
## 1: balloon,barbarian-barrel,elixir-collector,fireball,freeze,inferno-tower,...
## 2: baby-dragon,balloon,barbarian-barrel,bowler,freeze,inferno-dragon,...
## 3: goblin-barrel,hog-rider,inferno-tower,minion-horde,prince,rocket,...
## 4: arrows,balloon,electro-wizard,ice-wizard,inferno-dragon,mega-knight,...
## 5: baby-dragon,balloon,barbarian-barrel,bowler,freeze,inferno-dragon,...
## ---
## 996: dark-prince,inferno-dragon,pekka,poison,skeleton-army,tombstone,...
## 997: bats,electro-wizard,giant-skeleton,goblin-barrel,lumberjack,princess,...
## 998: baby-dragon,fireball,fire-spirits,inferno-dragon,mini-pekka,the-log,...
## 999: elite-barbarians,fire-spirits,goblin-barrel,goblin-gang,minion-horde,princess,...
## 1000: bats,battle-ram,elixir-collector,giant,goblin-gang,minion-horde,...
## opp_deck_list
## 1: bomb-tower,executioner,ice-wizard,inferno-tower,mortar,rocket,...
## 2: baby-dragon,bats,dart-goblin,electro-wizard,flying-machine,mega-knight,...
## 3: bandit,battle-ram,electro-wizard,fireball,minions,pekka,...
## 4: clone,executioner,fireball,giant-skeleton,miner,mirror,...
## 5: bats,clone,elixir-collector,executioner,golem,night-witch,...
## ---
## 996: clone,ice-wizard,mega-knight,princess,royal-ghost,skeleton-army,...
## 997: arrows,bats,giant-skeleton,goblin-hut,inferno-tower,poison,...
## 998: dark-prince,elixir-collector,goblins,golem,ice-golem,lumberjack,...
## 999: arrows,giant-skeleton,goblin-hut,hog-rider,minion-horde,minions,...
## 1000: arrows,balloon,bandit,barbarian-barrel,lumberjack,mega-minion,...
## winner_deck_list
## 1: balloon,barbarian-barrel,elixir-collector,fireball,freeze,inferno-tower,...
## 2: baby-dragon,balloon,barbarian-barrel,bowler,freeze,inferno-dragon,...
## 3: goblin-barrel,hog-rider,inferno-tower,minion-horde,prince,rocket,...
## 4: clone,executioner,fireball,giant-skeleton,miner,mirror,...
## 5: baby-dragon,balloon,barbarian-barrel,bowler,freeze,inferno-dragon,...
## ---
## 996: clone,ice-wizard,mega-knight,princess,royal-ghost,skeleton-army,...
## 997: bats,electro-wizard,giant-skeleton,goblin-barrel,lumberjack,princess,...
## 998: baby-dragon,fireball,fire-spirits,inferno-dragon,mini-pekka,the-log,...
## 999: arrows,giant-skeleton,goblin-hut,hog-rider,minion-horde,minions,...
## 1000: bats,battle-ram,elixir-collector,giant,goblin-gang,minion-horde,...
createCandidates <- function(prevCandidates) {
if (length(prevCandidates) == 0) {
return(list())
}
n <- length(prevCandidates[[1]])
result <- list()
for (i in 1:length(prevCandidates)) {
for (j in i:length(prevCandidates)) {
if ((n == 1 || identical(prevCandidates[[i]][1:(n-1)], prevCandidates[[j]][1:(n-1)])) &&
as.character(prevCandidates[[i]][n]) != as.character(prevCandidates[[j]][n])) {
new_list <- c(prevCandidates[[i]], prevCandidates[[j]][n])
result[[length(result)+1]] <- new_list
}
else if (n != 1 && !identical(prevCandidates[[i]][1:(n-1)], prevCandidates[[j]][1:(n-1)])) {
break
}
}
}
result
}
DECK_SIZE=8
HYBRID_SOLUTION_SIZE_THRESHOLD = 4 # the solution is too demanding in terms of time without hybrid solution
apriori <- function(df, support) {
n <- 2 * nrow(df)
player_deck_counts <- table(unlist(c(df$player_deck_list, df$opp_deck_list)))
winner_deck_counts <- table(unlist(df$winner_deck_list))
player_deck_counts_filtered <- player_deck_counts[player_deck_counts >= n * support]
curFreqSet <- lapply(names(player_deck_counts_filtered), function(card) list(card))
res <- mapply(function(x, y) c(itemset = list(x), pick_ratio = y / n, win_ratio = ifelse(is.na(winner_deck_counts[x][[1]]), 0, winner_deck_counts[x][[1]]) / y, size=1, name=x), names(player_deck_counts_filtered), as.list(player_deck_counts_filtered), SIMPLIFY = FALSE)
for (size in 2:HYBRID_SOLUTION_SIZE_THRESHOLD) {
cands = createCandidates(curFreqSet)
cand_counts <- hash()
winner_counts <- hash()
for (cand in cands) {
name <- paste(cand, collapse=", ")
cand_counts[[name]] <- 0
winner_counts[[name]] <- 0
}
lapply(combn(1:DECK_SIZE, size, simplify = FALSE), function(comb) {
player_counts_on_pos <- table(sapply(df$player_deck_list, function(lst) paste(lst[comb], collapse = ", ")))
opp_counts_on_pos <- table(sapply(df$opp_deck_list, function(lst) paste(lst[comb], collapse = ", ")))
winner_counts_on_pos <- table(sapply(df$winner_deck_list, function(lst) paste(lst[comb], collapse = ", ")))
lapply(names(player_counts_on_pos), function(name) {
if(exists(name, cand_counts)) {
cand_counts[[name]] <- cand_counts[[name]] + player_counts_on_pos[[name]]
}
})
lapply(names(opp_counts_on_pos), function(name) {
if(exists(name, cand_counts)) {
cand_counts[[name]] <- cand_counts[[name]] + opp_counts_on_pos[[name]]
}
})
lapply(names(winner_counts_on_pos), function(name) {
if(exists(name, winner_counts)) {
winner_counts[[name]] <- winner_counts[[name]] + winner_counts_on_pos[[name]]
}
})
})
curFreqSet <- lapply(names(cand_counts), function(name) {
picks <- cand_counts[[name]]
wins <- if (exists(name, winner_counts)) winner_counts[[name]] else 0
if (picks >= n * support) {
lst <- unlist(strsplit(name, ", "))
newSet <- list(itemset = lst, pick_ratio = picks / n, win_ratio = wins / picks, size = length(lst), name=name)
return(newSet)
}
else {
return(NULL)
}
})
curFreqSet <- curFreqSet[lengths(curFreqSet) > 0]
res <- c(res, curFreqSet)
curFreqSet <- lapply(curFreqSet, function(x) x$itemset)
}
for (size in (HYBRID_SOLUTION_SIZE_THRESHOLD+1):DECK_SIZE) {
cands = createCandidates(curFreqSet)
curFreqSet <- lapply(cands, function(lst) {
rows <- sum(colSums(sapply(df$player_deck_list, function(deck) lst %in% deck)) == length(lst)) + sum(colSums(sapply(df$opp_deck_list, function(deck) lst %in% deck)) == length(lst))
if (rows >= support * n) {
rows_won <- sum(colSums(sapply(df$winner_deck_list, function(deck) lst %in% deck)) == length(lst))
return(list(itemset = lst, pick_ratio = rows / n, win_ratio = rows_won / rows, size = length(lst), name=paste(lst, collapse=", ")))
} else {
return(NULL)
}
})
curFreqSet <- curFreqSet[lengths(curFreqSet) > 0]
res <- c(res, curFreqSet)
curFreqSet <- lapply(curFreqSet, function(x) x$itemset)
}
res
}
aprioriToDt <- function(aprioriRes) {
res <- rbindlist(lapply(aprioriRes, function(el) {
el$itemset <- el$name
el
}))
res[, itemset:= sapply(res$name, function(name) strsplit(name, ", "))] # rbindlist doesn't transform non-atomic values
res
}
findFreqItems <- function(df, support) {
transactions <- as(c(df$player_deck_list, df$opp_deck_list), "transactions")
frequent_itemsets <- eclat(transactions, parameter = list(support = support))
res <- data.table(as(frequent_itemsets, "data.frame"))
res$itemset <- lapply(strsplit(gsub("[{}]", "", res$items), ",\\s*"), as.character)
res$pick_ratio <- res$support
res$size <- sapply(res$itemset, function(items) length(items))
res$name <- sapply(res$itemset, function(items) paste(items, collapse = ", "))
res$items <- NULL
res$support <- NULL
res$count <- NULL
wins_transactions <- as(df$winner_deck_list, "transactions")
winning_itemsets <- eclat(wins_transactions, parameter = list(support = 0.001))
winners <- data.table(as(winning_itemsets, "data.frame"))
winners$name <- sapply(lapply(strsplit(gsub("[{}]", "", winners$items), ",\\s*"), as.character), function(items) paste(items, collapse = ", "))
winners$wins <- winners$count
winners$items <- NULL
winners$support <- NULL
winners$count <- NULL
res_with_wins <- merge(res, winners, by = "name", all.x = TRUE)
res_with_wins$wins <- ifelse(is.na(res_with_wins$wins), 0, res_with_wins$wins)
n <- 2 * nrow(df)
res_with_wins$win_ratio <- res_with_wins$wins / res_with_wins$pick_ratio / n
res_with_wins
}
SUPPORT = 0.01
freqItemsDt = findFreqItems(cards, SUPPORT)
head(freqItemsDt, 1000)
## name itemset pick_ratio size wins win_ratio
## 1: archers archers 0.06939363 1 1131063 0.5052745
## 2: archers, arrows archers,arrows 0.01331315 2 215490 0.5017720
## 3: archers, baby-dragon archers,baby-dragon 0.01183210 2 195513 0.5122406
## 4: archers, fireball archers,fireball 0.02198782 2 355750 0.5015600
## 5: archers, hog-rider archers,hog-rider 0.01537065 2 256442 0.5171985
## ---
## 996: ice-wizard, the-log ice-wizard,the-log 0.02138866 2 346552 0.5022791
## 997: ice-wizard, tornado ice-wizard,tornado 0.01168411 2 184422 0.4893024
## 998: ice-wizard, valkyrie ice-wizard,valkyrie 0.02149111 2 350703 0.5058722
## 999: ice-wizard, witch ice-wizard,witch 0.01408483 2 225032 0.4952823
## 1000: ice-wizard, wizard ice-wizard,wizard 0.02463654 2 396525 0.4989430
createTop100 <- function(freqItemsDt, df) {
n <- 2 * nrow(df)
top100 <- freqItemsDt[order(-pick_ratio), .SD[1:100], by=size]
top100 <- top100[pick_ratio > 0] # filtering NA's
top100
}
genPickRatioPlot <- function(df) {
plot_ly(df, x=~factor(name,levels = unique(name[order(size, -pick_ratio)])), y=~pick_ratio, color=~factor(size, levels = unique(size)), hovertemplate="Itemset: %{x}<br>Pick Ratio: %{y}<br>", type="bar") %>% layout(xaxis = list(title="Itemset", showticklabels = FALSE), yaxis = list(title="Pick Ratio")) %>% layout(title="Pick Ratio by Itemset")
}
genAnimatedPickRatioPlot <- function(df, frame) {
plot_ly(df, x=~factor(name,levels = unique(name[order(size, -pick_ratio)])), y=~pick_ratio, color=~factor(size, levels = unique(size)), frame=frame, hovertemplate="Itemset: %{x}<br>Pick Ratio: %{y}<br>", type="bar") %>% layout(xaxis = list(title="Itemset", showticklabels = FALSE), yaxis = list(title="Pick Ratio")) %>% layout(title="Pick Ratio by Itemset") %>%
animation_opts(
500, redraw = FALSE
)
}
genWinRatioPlot <- function(df) {
plot_ly(df, x=~factor(name,levels = unique(name[order(size, -win_ratio)])), y=~win_ratio, color=~factor(size, levels = unique(size)), hovertemplate="Itemset: %{x}<br>Win Ratio: %{y}<br>", type="bar") %>% layout(xaxis = list(title="Itemset", showticklabels = FALSE), yaxis = list(title="Win Ratio")) %>% layout(title="Win Ratio by Itemset")
}
genAnimatedWinRatioPlot <- function(df, frame) {
plot_ly(df, x=~factor(name,levels = unique(name[order(size, -win_ratio)])), y=~win_ratio, color=~factor(size, levels = unique(size)), frame=frame, hovertemplate="Itemset: %{x}<br>Win Ratio: %{y}<br>", type="bar") %>% layout(xaxis = list(title="Itemset", showticklabels = FALSE), yaxis = list(title="Win Ratio")) %>% layout(title="Win Ratio by Itemset") %>%
animation_opts(
500, redraw = FALSE
)
}
top100 <- createTop100(freqItemsDt, cards)
genPickRatioPlot(top100)
genWinRatioPlot(top100)
freqItemsByArena <- lapply(unique(cards$arena_id), function(arenaId) {
fromArena <- cards[arena_id == arenaId]
freqFromArenaDt <- findFreqItems(fromArena, SUPPORT)
freqFromArenaDt[, arena_id := arenaId]
top100Arenas <- createTop100(freqFromArenaDt, fromArena)
top100Arenas
})
freqItemsByArena <- rbindlist(freqItemsByArena)
genAnimatedPickRatioPlot(freqItemsByArena, ~arena_id)
genAnimatedWinRatioPlot(freqItemsByArena, ~arena_id)
cards$day <- as.Date(cards$timestamp)
freqItemsByDay <- lapply(unique(cards$day), function(dayId) {
fromDay <- cards[day == dayId]
freqFromDayDt <- findFreqItems(fromDay, SUPPORT)
freqFromDayDt[, day := as.character(dayId)]
top100Days <- createTop100(freqFromDayDt, fromDay)
top100Days
})
freqItemsByDay <- rbindlist(freqItemsByDay)
genAnimatedPickRatioPlot(freqItemsByDay, ~day)
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to replace is not a multiple of replacement
## length
genAnimatedWinRatioPlot(freqItemsByDay, ~day)
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to replace is not a multiple of replacement
## length
findAssociacions <- function(cur_itemset, candidates, support) {
item_pick_ratio = cur_itemset$pick_ratio
max_pick_ratio = item_pick_ratio / support # support(cand => itemset) = item_pick_ratio / cand_pick_ratio >= support
candidates = candidates[pick_ratio <= max_pick_ratio]
cur_items <- cur_itemset$itemset
candidates_subsets <- candidates[sapply(itemset, function(items) all(items %in% cur_items) && !all(cur_items %in% items))]
mapply(function(pick_ratio, name, items) list(left_side_pick_ratio=pick_ratio, left_side=name, right_side=toString(setdiff(cur_items, items)), right_side_support=item_pick_ratio / pick_ratio), candidates_subsets$pick_ratio, candidates_subsets$name, candidates_subsets$itemset, SIMPLIFY = FALSE)
}
x <- apply(top100, MARGIN=1, function(itemset) findAssociacions(itemset, freqItemsDt, 0.33))
result <- list()
for (res_list in x) {
result <- c(result, res_list)
}
result <- rbindlist(result)
head(result[order(right_side_support, decreasing = TRUE)], 1000)
## left_side_pick_ratio left_side right_side right_side_support
## 1: 0.01552924 goblin-barrel, princess, the-log goblin-gang 0.7335815
## 2: 0.01556765 goblin-gang, princess, the-log goblin-barrel 0.7317716
## 3: 0.01585979 goblin-barrel, goblin-gang, the-log princess 0.7182921
## 4: 0.03254201 pekka, skeleton-army wizard 0.6656010
## 5: 0.01946923 balloon, minion-horde, valkyrie wizard 0.6644779
## ---
## 340: 0.12428474 rage witch 0.3347826
## 341: 0.31925937 zap wizard 0.3335369
## 342: 0.16561370 arrows valkyrie 0.3328076
## 343: 0.12264618 skeleton-army, valkyrie baby-dragon 0.3317739
## 344: 0.06534889 prince, skeleton-army witch 0.3304479
hover_text <- paste(
"<b>Left Side:</b> ", result$left_side, "<br>",
"<b>Left Side Pick Ratio:</b> ", result$left_side_pick_ratio, "<br>",
"<b>Right Side:</b> ", result$right_side, "<br>",
"<b>Right Side Support:</b> ", result$right_side_support, "<br>"
)
plot_ly(result, x = ~left_side_pick_ratio, y = ~right_side_support, type = "scatter", mode = "markers",
hovertemplate = hover_text, marker = list(size = 10, color = "blue"))
exploded <- cards[, .(tag, card = unlist(player_deck_list)), by = seq_len(nrow(cards))]
usage <- dcast(exploded, tag ~ card, fun.aggregate = length)
head(usage)
## tag archers arrows baby-dragon balloon bandit barbarian-barrel barbarian-hut barbarians bats battle-ram
## 1: 200000Y9R 0 0 0 0 0 0 0 0 0 3
## 2: 20000RGC2 0 0 0 0 0 0 0 0 0 0
## 3: 20002GUC 0 0 1 0 0 1 0 0 0 0
## 4: 20002JPV9 0 0 0 0 17 0 0 0 0 0
## 5: 20002Q9CV 0 0 0 0 0 0 0 0 0 0
## 6: 20002RUGP 0 0 0 0 0 0 0 0 0 0
## bomb-tower bomber bowler cannon cannon-cart clone dark-prince dart-goblin electro-dragon electro-wizard
## 1: 0 0 0 0 2 5 2 0 0 0
## 2: 0 0 0 0 0 0 0 0 0 0
## 3: 0 0 0 0 0 0 0 0 0 0
## 4: 0 0 0 0 0 0 0 0 0 0
## 5: 0 0 0 0 0 0 0 0 0 8
## 6: 0 0 0 0 0 0 0 0 0 0
## elite-barbarians elixir-collector executioner fire-spirits fireball flying-machine freeze furnace giant
## 1: 0 0 2 0 0 0 0 0 0
## 2: 0 0 0 0 0 0 0 0 0
## 3: 0 0 0 0 0 0 0 0 0
## 4: 0 0 0 0 17 0 0 0 0
## 5: 0 0 0 0 0 0 0 8 0
## 6: 0 0 0 0 0 0 0 0 0
## giant-skeleton giant-snowball goblin-barrel goblin-gang goblin-giant goblin-hut goblins golem graveyard guards heal
## 1: 2 0 2 3 0 0 0 0 0 0 0
## 2: 0 0 47 47 0 0 0 0 0 0 0
## 3: 1 0 0 0 0 0 0 0 0 0 0
## 4: 0 0 17 0 0 0 0 0 0 0 0
## 5: 8 0 0 0 0 0 0 0 0 0 0
## 6: 0 0 0 1 0 0 0 0 0 0 0
## hog-rider hunter ice-golem ice-spirit ice-wizard inferno-dragon inferno-tower knight lava-hound lightning lumberjack
## 1: 0 0 0 0 0 0 0 0 0 0 0
## 2: 0 0 0 47 0 0 0 0 0 0 0
## 3: 1 0 0 0 0 0 1 0 0 0 0
## 4: 0 0 0 0 0 0 0 0 0 0 0
## 5: 0 0 0 0 0 8 0 0 0 0 0
## 6: 1 0 0 0 0 0 0 1 0 0 1
## magic-archer mega-knight mega-minion miner mini-pekka minion-horde minions mirror mortar musketeer night-witch pekka
## 1: 0 0 0 0 0 0 0 0 0 3 3 3
## 2: 0 0 0 0 0 0 0 0 0 0 0 0
## 3: 0 0 0 0 0 0 1 0 0 0 0 0
## 4: 0 0 0 0 0 17 0 0 0 0 0 17
## 5: 8 8 0 0 0 0 0 0 0 0 0 0
## 6: 0 0 0 0 0 1 0 0 0 0 0 1
## poison prince princess rage ram-rider rascals rocket royal-ghost royal-giant royal-hogs royal-recruits skeleton-army
## 1: 0 0 0 0 0 0 0 0 0 0 0 2
## 2: 0 47 47 0 0 47 47 0 0 0 0 0
## 3: 1 0 0 0 0 0 0 0 0 0 0 0
## 4: 0 0 0 0 0 0 0 17 0 0 0 0
## 5: 0 0 0 0 0 0 0 8 0 0 0 0
## 6: 0 0 1 0 0 0 0 0 0 0 0 0
## skeleton-barrel skeletons sparky spear-goblins tesla the-log three-musketeers tombstone tornado valkyrie witch
## 1: 0 0 0 0 0 3 0 0 0 0 2
## 2: 0 0 0 0 0 47 0 0 0 0 0
## 3: 0 0 0 0 0 0 0 0 0 0 1
## 4: 0 0 0 0 0 0 0 0 0 0 17
## 5: 0 0 0 0 0 0 0 0 0 0 8
## 6: 0 0 0 0 0 0 0 0 0 0 0
## wizard x-bow zap zappies
## 1: 0 0 3 0
## 2: 0 0 0 0
## 3: 0 0 0 0
## 4: 17 0 0 0
## 5: 0 0 0 0
## 6: 0 0 1 0
Performing SVD decomposition on recommendation table:
usage_matrix <- as.matrix(usage[, -1])
svd_res <- svd(usage_matrix)
U <- svd_res$u
S <- diag(svd_res$d)
V <- svd_res$v
Grouping players based on the most dominant concept:
K = ceiling(nrow(S) / 2)
U <- U[1:K, ]
S <- S[1:K, 1:K]
V <- V[, 1:K]
groups <- max.col(apply(U, c(1,2), abs))
user_concept <- head(data.table(userId = usage$tag, cluster_number = groups), 1000)
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names = check.names, : Item 2 has 45 rows but
## longest item has 911927; recycled with remainder.
plot_ly(user_concept, x=~userId, y=~cluster_number, type = "scatter", color=~cluster_number) %>%
layout(xaxis = list(categoryorder = "total ascending"))
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
cards_number = nrow(V)
create_remcommendations <- function(chosen_cards) {
chosen_cards_idxs <- sapply(chosen_cards, function(card) which(names(usage) == card))
preferences <- rep(0, cards_number)
preferences[chosen_cards_idxs] <- 1
concept_preferences <- preferences %*% V
preference_prediction <- concept_preferences %*% t(V)
preference_prediction <- apply(preference_prediction, c(1,2), abs)
names(preference_prediction) <- colnames(usage)[-1] # cut 'tag' colname
setdiff(names(sort(preference_prediction, decreasing = TRUE)), chosen_cards)[1:4]
}
create_remcommendations(c("balloon", "baby-dragon", "archers", "arrows"))
## [1] "bandit" "mega-minion" "poison" "royal-ghost"